from pyspark.sql import Row

# 创建SparkSession对象
from pyspark import SparkContext,SparkConf
from pyspark.sql import SparkSession
spark = SparkSession.builder.config(conf = SparkConf()).getOrCreate()

people = spark.sparkContext.textFile("file:///usr/local/spark/examples/src/main/resources/people.txt").\
    map(lambda line :line.split(",")).map(lambda p:Row(name = p[0],age=int(p[1])))
schemaPeople = spark.createDataFrame(people)
# 必须注册为临时表才能供下面的查询使用
schemaPeople.createOrReplaceTempView("people")
personsDF = spark.sql("select name,age from people where age > 20")
#DataFrame 中的每个元素都是一行记录,包含 name 和 age 两个字段,分别用 p.name 和 p.age 来获取值
personsRDD=personsDF.rdd.map(lambda p:"Name: "+p.name+","+"Age: "+str(p.age))
personsRDD.foreach(print)